
<html xmlns="http://www.w3.org/1999/xhtml">
<html>
<head>
<title>LZW</title>
<meta name="description" content="LZW" />
<meta name="keywords" content="lzw" />
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<link rel="stylesheet" media="all" href="/style/style.css" type="text/css" />
</head>
<body>
<?php
//include_once '../../ants/lib_functions.php';
//include_once "../../../dbconnect.php";
//logAccess();
?>
<?php
    /* utility function to pad out an array of strings
        so that they are all the same length.
        Adds zeros to the begining.
    */
    function pad(&$array) {
        $max = 0;

        foreach($array as $word)
            if(($len = strlen($word)) > $max) $max = $len;

        foreach($array as $key => $word)
            $array[$key] = str_pad($word, $max, "0", STR_PAD_LEFT);

        return $max;
    }


if(isset($_GET[source])) {
    highlight_file(__FILE__);
}else{
?>
<b>LZW</b>
<br /><br />
<?php
if(!function_exists('str_split')){

    function str_split($str, $nr) {

         //Return an array with 1 less item then the one we have
         return array_slice(split("-l-", chunk_split($str, $nr, '-l-')), 0, -1);

    }
}

for($i = 0; $i < 256; $i++) //populate dictionary with ascii characters
    $startDictionary[$i] = chr($i);


function encodeLZW($string) {
    global $startDictionary;

    $dictionary = $startDictionary;

    $word = "";
    for($i = 0; $i < strlen($string); $i++) {
        
        $x = substr($string, $i, 1);
        if(in_array("$word$x", $dictionary, true)) {

            $word = "$word$x";


        }else{
            $encodedString[] = decbin(array_search("$word", $dictionary, true)); //encode to binary string


            $dictionary[] = "$word$x";

            $word = $x;

        }



    }


    $encodedString[]= decbin(array_search($word, $dictionary, true));  //encode to binary string

    return $encodedString;

}

function decodeLZW($string, $bits) {
    global $startDictionary;

    $dictionary = $startDictionary;

    $tokens = str_split($string, $bits); //tokenize the string - split every at every $bits, where $bits is the size of the bits used to encode one symbol.

    $decodedString = $dictionary[bindec($tokens[0])];       //function bindec - decode binary string to a decimal

    $word = $dictionary[bindec($tokens[0])];

    for($i = 1; $i < count($tokens); $i++) {

        $x = bindec($tokens[$i]);
        $element = $dictionary[$x];

        if(!$element) {

            $element = $word + $word{0};

        }

        $decodedString .= $element;

        $dictionary[] = "$word{$element{0}}";
        $word = $element;

    }

    return $decodedString;
}




if(isset($_POST[encode])) {

    //echo stripslashes($_POST[toEncode]);  //parodyti yvesta teksta
    //echo "<br /><Br />";


    $input = stripslashes($_POST[toEncode]);

    $output = encodeLZW($input);        // pradeda encoding


    $inputArray = str_split($input, 1);        //split the input into separate chars

    foreach($inputArray as $char) {            //work out each characters binary representation.
        $codeword = decbin(ord($char));

        $input_code .= $codeword;
    }

    //echo chunk_split($input_code, 100, "<br />");      //show the binary code for the input - 100 bits per line

    //echo "<br /><Br />";
    $before = strlen($input_code);                    //length before encoding.
    echo "input is $before bits long - encoded as 8 bit code words";

    //$codewords = explode(",",$output);


    //echo "<br /><br />";


    $bits = pad($output,0);        //pad all the code words with zeros, and return the number of bits used


    foreach($output as $codeword)    //put code together
        $output_code .= $codeword;

    $output_code = str_pad(decbin($bits), 16, "0", STR_PAD_LEFT).$output_code; //add encoded bit size to the first 16 bits of the code



    //echo chunk_split($output_code, 100, "<br />");//show the binary code for the output - 100 bits per line


    echo "<br /><br />";
    $after = strlen($output_code);    //length after encoding.
    echo "output is $after bits long - encoded as $bits bit code words";


    $_POST[toDecode] = $output_code;
    $_POST[toEncode] = "";

    $ratio = $after / $before * 100;


    echo "<br /><Br />";

    echo "compression ratio: $ratio%";

    echo "<br /><Br />";

}else if(isset($_POST[decode])){


    $input = $_POST[toDecode];

    $bits = substr($input, 0, 16);    //remove the first 16 bits
    $input = substr($input, 16);

    $output = decodeLZW($input, bindec($bits)); //decode ($bits is converted to a decimal)

    $_POST[toEncode] = $output;
    $_POST[toDecode] ="";

}






?>

<form method="post" action="<?=$_SERVER[PHP_SELF]?>">

    <textarea rows="10" cols="100" name="toEncode" onFocus="this.select();"><?=!(isset($_POST[encode]) || isset($_POST[decode])) ? "" : $_POST[toEncode]?></textarea>


    <input type="submit" name="encode" value="Koduoti" />

</form>
The encoded output will hopefully be shorter than the input!
<br /><br />
The first 16 bits of the output are used to store the number of bits used to encode each codeword.
<Br /><br />
<form method="post" action="<?=$_SERVER[PHP_SELF]?>" onfocus="this.select();">

    <textarea rows="10" cols="100" name="toDecode"><?=$_POST[toDecode]?></textarea>


    <input type="submit" name="decode" value="Dekoduoti" />

</form>

<a name="fix"></a>
<div>



</div>

<?php } ?>

<?php /*<script src="http://www.google-analytics.com/urchin.js" type="text/javascript">
</script>
<script type="text/javascript">
_uacct = "UA-1032885-1";
urchinTracker();
</script>
*/
?>
</body>
</html>


